import numpy as np
import cv2 as cv
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import os

def process_data(path):
    data = []
    label = []
    all_folders = os.listdir(path)
    for idx, folder in enumerate(all_folders):
        numbers = os.listdir(os.path.join(path, folder))
        for number in numbers:
            img = cv.imread(os.path.join(path, folder, number), 0)
            img = img.reshape(-1)
            data.append(img)
            label.append(idx)
    return np.array(data), np.array(label)

def train_and_evaluate():
    train_data, train_label = process_data('../MNIST/transformed/TRAIN/')
    test_data, test_label = process_data('../MNIST/transformed/TEST/')

    model = RandomForestClassifier(n_estimators=600, random_state=42)
    model.fit(train_data, train_label)  # Train the model

    predictions = model.predict(test_data)
    accuracy = accuracy_score(test_label, predictions)
    print(f'Test accuracy: {accuracy:.4f}')

    # Print the confusion matrix
    cm = confusion_matrix(test_label, predictions)
    print("Confusion Matrix:")
    print(cm)

    # Calculate the total number of decision nodes
    total_nodes = sum(tree.tree_.node_count - tree.tree_.n_leaves for tree in model.estimators_)
    print("Total decision nodes in the forest:", total_nodes)

if __name__ == "__main__":
    train_and_evaluate()
